In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# used to supress display of warnings
import warnings

# os is used to provide a way of using operating system dependent functionality
# We use it for setting working folder
import os

# Pandas is used for data manipulation and analysis
import pandas as pd 

# Numpy is used for large, multi-dimensional arrays and matrices, along with mathematical operators on these arrays
import numpy as np

from google.colab import files
import time

# Matplotlib is a data visualization library for 2D plots of arrays, built on NumPy arrays 
# and designed to work with the broader SciPy stack
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import pyplot

# Seaborn is based on matplotlib, which aids in drawing attractive and informative statistical graphics.
import seaborn as sns
import tensorflow 
print(tensorflow.__version__)

2.5.0


In [3]:
# suppress display of warnings
warnings.filterwarnings('ignore')

# display all dataframe columns
pd.options.display.max_columns = None

# to set the limit to 3 decimals
pd.options.display.float_format = '{:.7f}'.format

# display all dataframe rows
pd.options.display.max_rows = None

In [4]:
#Read the data scraped
filepath = "/content/drive/MyDrive/AI-ML/Data/Ent_Articles_combined_withAuthor12June r2.csv"
df = pd.read_csv(filepath)
df.head()

Unnamed: 0.1,Unnamed: 0,Article URL,Headline,Article Text,Sub Domain,Domain,Author
0,0.0,https://www.entrepreneur.com/article/370874,\n\t\t\t\tIs Launching a New Brand the Right M...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Branding,Marketing,Melissa Packham
1,1.0,https://www.entrepreneur.com/article/372254,\n\t\t\t\t3 Reasons Simple Isn't Always Better...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Branding,Marketing,Zaheer Dodhia
2,2.0,https://www.entrepreneur.com/article/368356,\n\t\t\t\tFrom Idea to Revenue: A Six-Step For...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Launching a Business,Marketing,Jessica O'Connell
3,3.0,https://www.entrepreneur.com/article/373825,\n\t\t\t\tAvoid This Common Mistake When Writi...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Writing a Book,Marketing,R. Paulo Delgado
4,4.0,https://www.entrepreneur.com/article/372679,\n\t\t\t\tIs it Time to Say RIP to RFPs?\n\t\t\t,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t9,...",Entrepreneurs,Marketing,Heather Ripley


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32108 entries, 0 to 32107
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    30628 non-null  float64
 1   Article URL   30060 non-null  object 
 2   Headline      32108 non-null  object 
 3   Article Text  32108 non-null  object 
 4   Sub Domain    30628 non-null  object 
 5   Domain        32108 non-null  object 
 6   Author        32108 non-null  object 
dtypes: float64(1), object(6)
memory usage: 1.7+ MB


In [6]:
df.shape

(32108, 7)

In [7]:
#check any na
df.isna().sum()

Unnamed: 0      1480
Article URL     2048
Headline           0
Article Text       0
Sub Domain      1480
Domain             0
Author             0
dtype: int64

In [8]:
#NO URL available
df[df['Article URL'].isna()]

Unnamed: 0.1,Unnamed: 0,Article URL,Headline,Article Text,Sub Domain,Domain,Author
19694,,,Will American Brands Be a Casualty of War?,In a recent op-ed piece for the Sunday London ...,,Marketing,\n Sean Silverthorne
30061,2073.0,,\n\t\t\t\tAll 13 Tech Stocks on the Entreprene...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t1...,Entrepreneur Index,Technology,Andrew Osterland
30062,2074.0,,\n\t\t\t\tMake Your Mac a Streamlined Producti...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t1...,Productivity,Technology,Entrepreneur Store
30063,2082.0,,\n\t\t\t\tWhy the Tide Is Turning for the Ener...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t8...,Energy,Technology,Daniel Saunders
30064,2083.0,,\n\t\t\t\tHow to Make Your Video Meetings More...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t7...,Video Conferences,Technology,StackCommerce
30065,2084.0,,\n\t\t\t\tThere's No Such Thing As an 'Ideal' ...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t7...,Innovation,Technology,Sanyogita Shamsunder
30066,2085.0,,\n\t\t\t\tThe Most Important Blockchain Organi...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t7...,Blockchain,Technology,Cynthia Johnson
30067,2086.0,,"\n\t\t\t\tFor Europe to Lead in Blockchain, it...",\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t4...,Blockchain,Technology,Albert Isola M.P
30068,2087.0,,\n\t\t\t\tHow to Encourage Young Girls to Purs...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t2...,Enterprising women,Technology,The Conversation
30069,2088.0,,"\n\t\t\t\tWhy Software Projects Fail, and the ...",\n\n\n\t\t\t\t\t\t\t\tFebruary\n\t\t\t\t\t\t\t...,software development,Technology,Thomas Smale


In [9]:
# Only keep records where Domain is non blank 
df = df[df['Domain'].notna()]

In [10]:
df.isna().sum()

Unnamed: 0      1480
Article URL     2048
Headline           0
Article Text       0
Sub Domain      1480
Domain             0
Author             0
dtype: int64

In [11]:
#Check Unique Values of Domain
df['Domain'].unique()

array(['Marketing', 'Finance', 'Leadership', 'Strategy', 'Technology'],
      dtype=object)

In [12]:
#Unique Values of Doamin
df['Domain'].value_counts()

Marketing     7921
Technology    7223
Leadership    6563
Strategy      5463
Finance       4938
Name: Domain, dtype: int64

In [13]:
#cleaning the text
from bs4 import BeautifulSoup
import re

def str_strip(text):
    soup = BeautifulSoup(text, "html.parser") 
    text = re.sub('\[[^]]*\]', '', soup.get_text())
    text = re.sub('\nAuthor Abstract\n', '', text)
    text = re.sub('(?=Paper Information)(?s)(.*$)', '', text)
    text = text.strip()
    text = re.sub('\n', ' ', text)
    text = re.sub('\t', ' ', text)
    text = re.sub(u'\xa0', u' ', text)
    text = re.sub('—', ' ', text)  
    pattern=r"[^a-zA-z0-9\s,.;!()&$\-']"
    text=re.sub(pattern,'',text)
    text = re.sub('/\s\s+/g', ' ', text) #remove multiple spaces
    text = re.sub('/  +/g', ' ',text)
    read_pattern1 =r'\d min read'
    read_pattern2 =r'min read'
    translation_pattern = r'This article was translated from our Spanish edition using AI technologies. Errors may exist due to this process.'
    opinion_pattern = 'Opinions expressed by Entrepreneur contributors are their own.'
    date_pattern = r'^(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d+)\,\s+(\d+)\s+' 
    text=re.sub(read_pattern1,'',text)
    text=re.sub(read_pattern2,'',text)
    text=re.sub(translation_pattern,'',text)
    text=re.sub(opinion_pattern,'',text)
    text=re.sub(date_pattern,'',text)
    return text


In [14]:
df['Cleaned_Article']=df['Article Text'].apply(str_strip)

In [15]:
df['Cleaned_Headline']=df['Headline'].apply(str_strip)

In [16]:
df['Cleaned_Author']=df['Author'].apply(str_strip)

In [17]:
df['Cleaned_Author']=df['Author'].apply(str_strip)

In [18]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,Article URL,Headline,Article Text,Sub Domain,Domain,Author,Cleaned_Article,Cleaned_Headline,Cleaned_Author
0,0.0,https://www.entrepreneur.com/article/370874,\n\t\t\t\tIs Launching a New Brand the Right M...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Branding,Marketing,Melissa Packham,One of the most common challenges I see client...,Is Launching a New Brand the Right Move for Yo...,Melissa Packham
1,1.0,https://www.entrepreneur.com/article/372254,\n\t\t\t\t3 Reasons Simple Isn't Always Better...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Branding,Marketing,Zaheer Dodhia,"Lets say that, like me, youve been thinking ab...",3 Reasons Simple Isn't Always Better When It C...,Zaheer Dodhia
2,2.0,https://www.entrepreneur.com/article/368356,\n\t\t\t\tFrom Idea to Revenue: A Six-Step For...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Launching a Business,Marketing,Jessica O'Connell,"If youre like most entrepreneurs, you may have...",From Idea to Revenue A Six-Step Formula to Lau...,Jessica O'Connell
3,3.0,https://www.entrepreneur.com/article/373825,\n\t\t\t\tAvoid This Common Mistake When Writi...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t10...,Writing a Book,Marketing,R. Paulo Delgado,I had an interesting project come across my de...,Avoid This Common Mistake When Writing Your Fi...,R. Paulo Delgado
4,4.0,https://www.entrepreneur.com/article/372679,\n\t\t\t\tIs it Time to Say RIP to RFPs?\n\t\t\t,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t9,...",Entrepreneurs,Marketing,Heather Ripley,"In a world where personal touch matters, why a...",Is it Time to Say RIP to RFPs,Heather Ripley
5,5.0,https://www.entrepreneur.com/article/373603,\n\t\t\t\t4‌ ‌Lessons‌ ‌Nonfiction‌ ‌Writers‌ ...,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t9,...",Writing a Book,Marketing,R. Paulo Delgado,I just finished reading Matthew McConaughey's ...,4 Lessons Nonfiction Writers Can Learn From Ma...,R. Paulo Delgado
6,12.0,https://www.entrepreneur.com/article/373832,\n\t\t\t\tThe 5 Cs of Logo Design That Will Al...,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t8,...",Design,Marketing,Zaheer Dodhia,How do you solve a problem like a boring logo ...,The 5 Cs of Logo Design That Will Always Stand...,Zaheer Dodhia
7,13.0,https://www.entrepreneur.com/article/371210,\n\t\t\t\tThe 3 Questions That Will Help You D...,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t8,...",Branding,Marketing,Raj Girn,What does the word branding mean to you and yo...,The 3 Questions That Will Help You Define Your...,Raj Girn
8,14.0,https://www.entrepreneur.com/article/374003,\n\t\t\t\t40 million people made use of some o...,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t8,...",Ecommerce,Marketing,Entrepreneur en Español,Digital payments in Latin America grew in a wa...,40 million people made use of some online paym...,Entrepreneur en Espaol
9,15.0,https://www.entrepreneur.com/article/373995,\n\t\t\t\tHow to Sell More on Amazon Prime Day...,"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t8,...",Amazon Prime Day,Marketing,Entrepreneur en Español,Dates for Amazon Day 2021 have already been an...,How to Sell More on Amazon Prime Day 2021,Entrepreneur en Espaol


In [19]:
df['Article Text'][8]

"\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t8, 2021\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t3 min read\n\t\t\t\t\t\t\t\t\t\t\t\t\t\nThis article was translated from our Spanish edition using AI technologies. Errors may exist due to this process.\n\n\nDigital payments in Latin America grew in a way that we could never imagine. Sure, the health crisis has to be taken into account, but the use of e-commerce has skyrocketed beyond any possible prediction. At least that reflects the recent study of Mercado Pago Trends of online payments in Latam one year after COVID-19 . The analysis made between April 2020 and March 2021, contemplates the use of two online payment methods: the payment processor that is used in millions of web pages and the use of the payment link, used to charge through networks social or chats. This analysis, carried out in seven countries in the region ( Argentina, Brazil, Chile, Colombia, Mexico, Peru and Uruguay ) between April 2020 and March 2021, showed that 

In [20]:
df['Cleaned_Article'][8]

"Digital payments in Latin America grew in a way that we could never imagine. Sure, the health crisis has to be taken into account, but the use of e-commerce has skyrocketed beyond any possible prediction. At least that reflects the recent study of Mercado Pago Trends of online payments in Latam one year after COVID-19 . The analysis made between April 2020 and March 2021, contemplates the use of two online payment methods the payment processor that is used in millions of web pages and the use of the payment link, used to charge through networks social or chats. This analysis, carried out in seven countries in the region ( Argentina, Brazil, Chile, Colombia, Mexico, Peru and Uruguay ) between April 2020 and March 2021, showed that more than 40 million people made use of an online payment ,   Brazil being the country with the most new users with 21 million, followed by Argentina with 8 million and Mexico with 4 million . Chile and Colombia added 3 million each, while Peru added a millio

In [21]:
df['Cleaned_Author'].nunique()

5868

In [22]:
#Lets look at authors with highest # of articles
df["Cleaned_Author"].value_counts().nlargest(30)

Unknown                   870
Entrepreneur Store        699
Catherine Clifford        527
Kim Lachance Shandrow     515
Reuters                   508
Nina Zipkin               426
Jason Fell                408
Geoff Weiss               354
Laura Entis               262
John Rampton              240
Entrepreneur en Espaol    210
Rose Leadem               207
John Boitnott             191
Brian Patrick Eha         179
Entrepreneur Staff        155
Lydia Belanger            154
Ivan Misner               153
Jonathan Blum             150
Jason Feifer              147
Heather R. Huhman         146
Mikal E. Belicove         146
Jonathan Long             141
Steve Tobak               141
Andrew Osterland          138
Lewis Howes               137
Gwen Moran                136
Thomas Smale              133
Jim Joseph                133
Ray Hennessey             132
Andrew Medal              130
Name: Cleaned_Author, dtype: int64

In [23]:
#List of authors who are not available or Unknown
missing_authors_names = ['Unknown','Reuters','Entrepreneur Store','Entrepreneur Staff','Entrepreneur en Espaol']
#Let's see how many records would be unavailable per author
df[df.Cleaned_Author.isin(missing_authors_names)]['Cleaned_Author'].value_counts()

Unknown                   870
Entrepreneur Store        699
Reuters                   508
Entrepreneur en Espaol    210
Entrepreneur Staff        155
Name: Cleaned_Author, dtype: int64

In [24]:
count_missing_author = df[df.Cleaned_Author.isin(missing_authors_names)]['Cleaned_Author'].value_counts().sum()
print("\n Count of Records with Misisng Authors  needs to be dropped  :", count_missing_author)


 Count of Records with Misisng Authors  needs to be dropped  : 2442


In [25]:
# Lets remove records of these missing authors
df_clean = df[~df.Cleaned_Author.isin(missing_authors_names)]
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 29666 entries, 0 to 32107
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        28358 non-null  float64
 1   Article URL       27872 non-null  object 
 2   Headline          29666 non-null  object 
 3   Article Text      29666 non-null  object 
 4   Sub Domain        28358 non-null  object 
 5   Domain            29666 non-null  object 
 6   Author            29666 non-null  object 
 7   Cleaned_Article   29666 non-null  object 
 8   Cleaned_Headline  29666 non-null  object 
 9   Cleaned_Author    29666 non-null  object 
dtypes: float64(1), object(9)
memory usage: 2.5+ MB


In [26]:
#Lets check number of words in  Articles and store in a column
df_clean['Word_Count']=df_clean['Cleaned_Article'].apply(lambda x:len(x.split(' ')))

In [27]:
#Lets see how many artiicles have very few words say 50 or less
notext_df = df_clean[df_clean['Word_Count'] < 50]
notext_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35 entries, 1111 to 26525
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        20 non-null     float64
 1   Article URL       35 non-null     object 
 2   Headline          35 non-null     object 
 3   Article Text      35 non-null     object 
 4   Sub Domain        20 non-null     object 
 5   Domain            35 non-null     object 
 6   Author            35 non-null     object 
 7   Cleaned_Article   35 non-null     object 
 8   Cleaned_Headline  35 non-null     object 
 9   Cleaned_Author    35 non-null     object 
 10  Word_Count        35 non-null     int64  
dtypes: float64(1), int64(1), object(9)
memory usage: 3.3+ KB


In [28]:
#Lets see how many words per articles these small ones have
notext_df.groupby('Word_Count').size()

Word_Count
1     12
3      1
7      5
9      1
10     1
11     1
13     2
25     1
26     1
28     1
30     2
31     2
35     1
38     1
45     1
47     2
dtype: int64

In [29]:
#Only keep records with Cleaned Test length >50 words
df_clean = df_clean[df_clean['Word_Count'] >50]

In [30]:
#Categorize per Size of articles
df_clean['Article_Size'] = df_clean['Word_Count'].apply(lambda x: 'Small' if x <= 500 else ('Medium' if x <1000 else 'Large'))

In [31]:
# Count of Articles Size
df_clean.Article_Size.value_counts()

Medium    16576
Large      6891
Small      6164
Name: Article_Size, dtype: int64

In [32]:
#Domainwise count
df_clean.groupby(['Domain','Article_Size']).size()

Domain      Article_Size
Finance     Large           1031
            Medium          2553
            Small           1016
Leadership  Large           1736
            Medium          3889
            Small            642
Marketing   Large           1632
            Medium          4572
            Small           1263
Strategy    Large           1578
            Medium          3007
            Small            620
Technology  Large            914
            Medium          2555
            Small           2623
dtype: int64

In [33]:
grouped_df = df_clean.groupby(['Domain','Article_Size']).size().reset_index(name="Count")
grouped_df

Unnamed: 0,Domain,Article_Size,Count
0,Finance,Large,1031
1,Finance,Medium,2553
2,Finance,Small,1016
3,Leadership,Large,1736
4,Leadership,Medium,3889
5,Leadership,Small,642
6,Marketing,Large,1632
7,Marketing,Medium,4572
8,Marketing,Small,1263
9,Strategy,Large,1578


In [34]:
# Shuffle data before splitting
data_shuffled = df_clean.sample(frac=1, random_state=42) # shuffle with random_state=42 for reproducibility
data_shuffled.head()

Unnamed: 0.1,Unnamed: 0,Article URL,Headline,Article Text,Sub Domain,Domain,Author,Cleaned_Article,Cleaned_Headline,Cleaned_Author,Word_Count,Article_Size
911,1458.0,https://www.entrepreneur.com/article/333023,\n\t\t\t\tThe 21st Century Affluent Woman and ...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t24...,Marketing,Marketing,Dan S. Kennedy,The following excerpt is from Dan S. Kennedys ...,The 21st Century Affluent Woman and Her Market...,Dan S. Kennedy,1063,Large
28318,5129.0,https://www.entrepreneur.com/article/355070,\n\t\t\t\tSmart Home: Blossom Masters The Wate...,\n\n\n\t\t\t\t\t\t\t\tMarch\n\t\t\t\t\t\t\t\t6...,Product Design,Technology,Tamara Clarke,"You're reading Entrepreneur Middle East, an in...",Smart Home Blossom Masters The Waterworks,Tamara Clarke,167,Small
20586,1152.0,https://www.entrepreneur.com/article/343145,\n\t\t\t\tTips to Follow When Re-Fitting Your ...,\n\n\n\t\t\t\t\t\t\t\tAugust\n\t\t\t\t\t\t\t\t...,Business Model,Strategy,Tanner Simkins,"In the midst of a global health crisis, it can...",Tips to Follow When Re-Fitting Your Business M...,Tanner Simkins,852,Medium
13643,2496.0,https://www.entrepreneur.com/article/321218,\n\t\t\t\t9 Ways to Combat Decision Fatigue\n\...,\n\n\n\t\t\t\t\t\t\t\tNovember\n\t\t\t\t\t\t\t...,Cultivating Emotional Intelligence,Leadership,Deep Patel,"Making decisions, even small, seemly harmless ...",9 Ways to Combat Decision Fatigue,Deep Patel,1478,Large
4996,7914.0,https://www.entrepreneur.com/article/235235,\n\t\t\t\tThe 3 Fundamentals for a Successful ...,\n\n\n\t\t\t\t\t\t\t\tJune\n\t\t\t\t\t\t\t\t27...,Rebranding,Marketing,Greg Liberman,"JDate, the premier community for Jewish single...",The 3 Fundamentals for a Successful Rebrand,Greg Liberman,769,Medium


In [37]:
#Split into Train and test, keep test data only for final prediction
from sklearn.model_selection import train_test_split
train, test = train_test_split(data_shuffled, test_size=0.1) #10% data saved for final test

In [39]:
df_clean.shape, train.shape , test.shape

((29631, 12), (26667, 12), (2964, 12))

In [40]:
df_clean.groupby('Domain').size()

Domain
Finance       4600
Leadership    6267
Marketing     7467
Strategy      5205
Technology    6092
dtype: int64

In [41]:
train.groupby('Domain').size()

Domain
Finance       4126
Leadership    5641
Marketing     6741
Strategy      4664
Technology    5495
dtype: int64

In [42]:
test.groupby('Domain').size()

Domain
Finance       474
Leadership    626
Marketing     726
Strategy      541
Technology    597
dtype: int64

In [43]:
#export file and save as CSV
df_clean.to_csv("/content/drive/MyDrive/AI-ML/Data/Final_Articles_Data_25June.csv")
train.to_csv("/content/drive/MyDrive/AI-ML/Data/Final_Articles_Train_25June.csv")
test.to_csv("/content/drive/MyDrive/AI-ML/Data/Final_Articles_Test_25June.csv")

In [87]:
#Download the file to the local machine
#files.download("/content/drive/MyDrive/AI-ML/Data/Final_Articles_Data_25June.csv")
#files.download("/content/drive/MyDrive/AI-ML/Data/Final_Articles_Train_25June.csv")
#files.download("/content/drive/MyDrive/AI-ML/Data/Final_Articles_Test_25June.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>