# Scrapping ViableEDU's Linkedin

We want to know who is interacting with viableEDU:
* Persons reacting to content posted
* Information about persons interacting with the posts 
* List of followers

Sources:
* https://selenium-python.readthedocs.io/

In [33]:
# Libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import numpy as np
import pandas as pd
import re
import math as mt
from datetime import datetime

In [2]:
# Initialize Selenium
driver = webdriver.Chrome(executable_path='ChromeDriver/chromedriver.exe')

In [3]:
# Login into Linkedin
url = 'https://www.linkedin.com/login'
driver.get(url)

# Locate the elements to be filled
user = driver.find_element_by_id("username")
user.clear()
pswd = driver.find_element_by_id("password")
pswd.clear()

# Introduce your credentials
user.send_keys("")
pswd.send_keys("")
pswd.send_keys(Keys.RETURN)

## Scrapping ViableEDU Linkedin

In [4]:
# Scrapping ViableEDU Linkedin
url2 = 'https://www.linkedin.com/company/viableedu/'
driver.get(url2)

# Scroll down the page until it reachs the end
webElem = driver.find_element_by_tag_name('html')
i = 0
while (i < 20):
    time.sleep(0.5)
    webElem.send_keys(Keys.PAGE_DOWN)
    i = i + 1

In [5]:
# Close the chat window
driver.find_element_by_id('ember285').click()

### List of posts made by ViableEDU

In [6]:
# Get info from all the posts
postsList = driver.find_element_by_id('organization-feed')
posts = postsList.find_elements_by_class_name('occludable-update.ember-view')

In [57]:
postDF = pd.DataFrame(columns=['author','followers','dayPosted','description','interactionsTot','commentsTot','postNmbr'])
postInfo = {}
for i in range(len(posts)):
    # Who made the post
    postInfo['author'] = posts[i].find_element_by_class_name('feed-shared-actor__title').text 

    # Number of followers at the time of posting
    postInfo['followers'] = posts[i].find_element_by_class_name('feed-shared-actor__description.t-12.t-normal.t-black--light').text 

    # When it was posted
    postInfo['dayPosted'] = posts[i].find_element_by_class_name('feed-shared-actor__sub-description.t-12.t-normal.t-black--light').find_element_by_class_name('visually-hidden').text

    # Post description 
    try:
        postInfo['description'] = posts[i].find_element_by_class_name('break-words').text
    except:
        postInfo['description'] = ''        
        
    # How many reactions got the post
    n = posts[i].find_element_by_class_name('social-details-social-counts.ember-view').text.split('\n')
    if len(n) == 1:
        postInfo['interactionsTot'] = int(n[0])
        postInfo['commentsTot'] = 0
    elif 'Views' not in n[1]:
        postInfo['interactionsTot'] = int(n[0])
        postInfo['commentsTot'] = int(re.findall(r'\d+',n[1])[0])
    else:
        postInfo['interactionsTot'] = int(n[0])
        postInfo['commentsTot'] = 0
        
    # Post number (0 = most recent post)
    postInfo['postNmbr'] = i
    # Append results in dataframe
    postDF = postDF.append(pd.DataFrame.from_dict(postInfo,orient = 'index').T)

In [58]:
postDF['dayScrapped'] = datetime.today().strftime('%Y-%m-%d')
postDF.reset_index(inplace=True,drop=True)
postDF

Unnamed: 0,author,followers,dayPosted,description,interactionsTot,commentsTot,postNmbr,dayScrapped
0,ViableEdu,93 followers,16 hours ago,"Today, Chris White and ViableEdu discussed the...",14,0,0,2020-11-03
1,ViableEdu,93 followers,4 days ago,Chris White and ViableEdu sat down with Procli...,154,9,1,2020-11-03
2,ViableEdu,93 followers,1 week ago,"This morning, ViableEdu and Chris White broke ...",24,0,2,2020-11-03
3,ViableEdu,93 followers,5 days ago,"Today, Chris White and ViableEdu discussed the...",19,0,3,2020-11-03
4,ViableEdu,93 followers,1 week ago,"Today, ViableEdu and Chris White dug into WHY ...",22,1,4,2020-11-03
5,ViableEdu,93 followers,2 weeks ago,Week #2 of our Fintech Apprentice Program kick...,15,0,5,2020-11-03
6,ViableEdu,93 followers,2 weeks ago,A huge thanks to our first Coffee Chat network...,31,1,6,2020-11-03
7,ViableEdu,93 followers,2 weeks ago,In the advanced session for week one of the Vi...,22,1,7,2020-11-03
8,ViableEdu,93 followers,1 week ago,Thank you to this week's Coffee Chat networkin...,25,1,8,2020-11-03
9,ViableEdu,93 followers,3 weeks ago,Extremely excited to kick off our inaugural 8-...,29,0,9,2020-11-03


### List of people who react to the post

In [106]:
# Scroll down the page until it reachs the end
webElem = driver.find_element_by_tag_name('html')
i = 0
while (i < 20):
    time.sleep(0.5)
    webElem.send_keys(Keys.PAGE_UP)
    i = i + 1

In [107]:
likesDF = pd.DataFrame(columns=['name','jobTitle','linkedin','reaction','postNmbr'])
for j in range(len(posts)):
    time.sleep(0.5)
    # Open the window with list of reactions
    posts[j].find_element_by_class_name('reactions-icon.social-detail-social-counts__count-icon.reactions-icon__consumption--small').click()    
    time.sleep(0.5)
    # Scrolldown to show the whole list of reactions
    webElem = driver.find_element_by_xpath('//div[@class="artdeco-modal__content social-details-reactors-modal__content ember-view"]//a')
    n = postInfo['interactionsTot']
    x = 0
    while (x < mt.ceil(n/6)):
        time.sleep(0.5)
        webElem.send_keys(Keys.PAGE_DOWN)
        x = x + 1
    
    # Scrape the list of people
    peopleInteracting = driver.find_elements_by_class_name('artdeco-list__item')
    peopleDF = pd.DataFrame(columns=['name','jobTitle','linkedin','reaction','postNmbr'])
    peopleDict = {}
    time.sleep(0.5)
    
    # Loop to collect all the info
    for i in range(n):
        # Post number (0 = most recent post)
        peopleDict['postNmbr'] = j
        
        # Name
        try:
            peopleDict['name'] = peopleInteracting[i].find_element_by_class_name('artdeco-entity-lockup__title.ember-view').text
        except:
            peopleDict['name'] = ''

        # Job title
        try:
            peopleDict['jobTitle'] = peopleInteracting[i].find_element_by_class_name('artdeco-entity-lockup__caption.ember-view').text
        except:
            peopleDict['jobTitle'] = ''

        # Linkedin Link
        try:
            peopleDict['linkedin'] = peopleInteracting[i].find_element_by_class_name('link-without-hover-state.ember-view').get_attribute('href')
        except:
            peopleDict['linkedin'] = ''

        # Reaction to the post
        try:
            peopleDict['reaction'] = peopleInteracting[i].find_element_by_class_name('reactions-icon.social-details-reactors-tab-body__icon.reactions-icon__consumption--small').get_attribute('alt')
        except:
            peopleDict['reaction'] = ''
        
        peopleDF = peopleDF.append(pd.DataFrame.from_dict(peopleDict,orient = 'index').T)
    likesDF = likesDF.append(peopleDF)
    # Click close
    driver.find_element_by_class_name("artdeco-modal__dismiss.artdeco-button.artdeco-button--circle.artdeco-button--muted.artdeco-button--2.artdeco-button--tertiary.ember-view").click()

In [109]:
likesDF.reset_index(inplace=True,drop=True)
likesDF

Unnamed: 0,name,jobTitle,linkedin,reaction,postNmbr
0,Joel Brown-Christenson,Director of Business Development at ViableMkts,https://www.linkedin.com/in/joel-brown-christe...,LIKE,0
1,Joshua Watts,Student-Athlete at Brown University,https://www.linkedin.com/in/joshuawatts13?mini...,LIKE,0
2,Reese Gregory,Founder of RWG Software Designs | FinTech Appr...,https://www.linkedin.com/in/reese-gregory-4735...,LIKE,0
3,Chris White,CEO at ViableMkts & BondCliQ,https://www.linkedin.com/in/chris-white-bb3923...,LIKE,0
4,Justin Hartwig,Williams College Class of 2025,https://www.linkedin.com/in/justin-hartwig-a53...,LIKE,0
...,...,...,...,...,...
216,David Wang,Junior Developer (Consultant) at BondCliQ,https://www.linkedin.com/in/david-wang-b07a661...,LIKE,16
217,Christine Ji,Women's Advisory Program at RBC Capital Markets,https://www.linkedin.com/in/christine-ji?miniP...,LIKE,16
218,Larkin Ison III,Business Analyst at ViableMkts LLC,https://www.linkedin.com/in/larkin-ison-iii-19...,LIKE,16
219,James Justicz,Viable360 Consulting Analyst,https://www.linkedin.com/in/james-justicz-2972...,LIKE,16


In [127]:
# We get the list
personsOfInterest = likesDF.drop_duplicates('name').reset_index(drop = True)
personsOfInterest = personsOfInterest[['name','jobTitle','linkedin']]
personsOfInterest

Unnamed: 0,name,jobTitle,linkedin
0,Joel Brown-Christenson,Director of Business Development at ViableMkts,https://www.linkedin.com/in/joel-brown-christe...
1,Joshua Watts,Student-Athlete at Brown University,https://www.linkedin.com/in/joshuawatts13?mini...
2,Reese Gregory,Founder of RWG Software Designs | FinTech Appr...,https://www.linkedin.com/in/reese-gregory-4735...
3,Chris White,CEO at ViableMkts & BondCliQ,https://www.linkedin.com/in/chris-white-bb3923...
4,Justin Hartwig,Williams College Class of 2025,https://www.linkedin.com/in/justin-hartwig-a53...
...,...,...,...
57,Margaret Koulen,Part-Time Associate at OpenDoor,https://www.linkedin.com/in/margaret-koulen-a8...
58,Jay Baker-Johnson,MBA Candidate at Villanova University,https://www.linkedin.com/in/jaybakerjohnson?mi...
59,Katherine Hu,Intern at FinTEx,https://www.linkedin.com/in/katherine-hu-123b0...
60,Jenn Lu,Intern at Georgetown University Investment Office,https://www.linkedin.com/in/jenn-lu-b05969164?...


### List of Linkedin followers

In [129]:
# Open the window with list of followers
url3 = 'https://www.linkedin.com/company/67245387/admin/analytics/followers/?anchor=org-view-followers'
driver.get(url3)
driver.find_element_by_class_name('org-view-page-followers-module__modal-button.t-16.p1.t-bold.full-width').click()

In [136]:
# Scrolldown to show the whole list of reactions
webElem = driver.find_element_by_xpath('//div[@id="ember459"]//a')
n = int(re.findall(r'\d+',postDF.loc[0,'followers'])[0])
x = 0
while (x < mt.ceil(n/6)):
    time.sleep(0.5)
    webElem.send_keys(Keys.PAGE_DOWN)
    x = x + 1

In [183]:
# Download all the list
followersList = driver.find_elements_by_css_selector('#ember459 > table > tbody > tr')

followersDF = pd.DataFrame(columns=['name','linkedin','jobTitle','monthFollowed'])
followers = {}
for i in range(len(followersList)):
    time.sleep(1)
    try:
        followers['name'] = followersList[i].find_element_by_class_name('artdeco-entity-lockup__title.ember-view').text
    except:
        followers['name'] = ''

    try:
        followers['linkedin'] = followersList[i].find_element_by_class_name('ember-view.link-without-hover-visited').get_attribute('href')
    except:
        followers['linkedin'] = ''

    try:
        followers['jobTitle'] = followersList[i].find_element_by_class_name('artdeco-entity-lockup__caption.ember-view').text
    except:
        followers['jobTitle'] = ''

    try:
        followers['monthFollowed'] = followersList[i].find_element_by_class_name('t-14.t-black--light').text.split('\n')[0]
    except:
        followers['monthFollowed'] = ''
    followersDF = followersDF.append(pd.DataFrame.from_dict(followers,orient = 'index').T)

In [185]:
followersDF.reset_index(inplace=True,drop=True)
followersDF

Unnamed: 0,name,linkedin,jobTitle,monthFollowed
0,David Holden,https://www.linkedin.com/in/david-holden-41469...,Student at The University of Michigan,November 2020
1,"Larkin Ison, Jr.",https://www.linkedin.com/in/larkin-ison-jr/,Business Units Techical Training Manager,November 2020
2,Brian Lane,https://www.linkedin.com/in/brianjlane/,Founder & CEO at FixtHub,October 2020
3,Timothy Oberweger,https://www.linkedin.com/in/ilovetitle/,"Vice President, Sr. Business Development Offic...",October 2020
4,Michael Jordan Pilgreen,https://www.linkedin.com/in/michael-jordan-pil...,Multi-Disciplined Wealth Advisor and Analyst,October 2020
...,...,...,...,...
88,Eamon Garrity-Rokous,https://www.linkedin.com/in/eamon-garrity-roko...,Consulting Business Analyst at ViableMkts LLC ...,August 2020
89,James Justicz,https://www.linkedin.com/in/james-justicz-2972...,Viable360 Consulting Analyst,August 2020
90,Margaret Koulen,https://www.linkedin.com/in/margaret-koulen-a8...,Part-Time Associate at OpenDoor,August 2020
91,Jamie Kaplan,https://www.linkedin.com/in/jamie-kaplan-169b9...,BondCliQ & ViableMkts,August 2020


### Join between followers list and people who react to our posts

In [189]:
fullList = followersDF.append(personsOfInterest)
fullList.drop_duplicates('name',inplace = True)
fullList.reset_index(drop = True,inplace = True)
fullList

Unnamed: 0,name,linkedin,jobTitle,monthFollowed
0,David Holden,https://www.linkedin.com/in/david-holden-41469...,Student at The University of Michigan,November 2020
1,"Larkin Ison, Jr.",https://www.linkedin.com/in/larkin-ison-jr/,Business Units Techical Training Manager,November 2020
2,Brian Lane,https://www.linkedin.com/in/brianjlane/,Founder & CEO at FixtHub,October 2020
3,Timothy Oberweger,https://www.linkedin.com/in/ilovetitle/,"Vice President, Sr. Business Development Offic...",October 2020
4,Michael Jordan Pilgreen,https://www.linkedin.com/in/michael-jordan-pil...,Multi-Disciplined Wealth Advisor and Analyst,October 2020
...,...,...,...,...
109,Emeka O.,https://www.linkedin.com/in/emekaogbonnna?mini...,Helping Brands Accelerate Growth,
110,Daniel Green,https://www.linkedin.com/in/daniel-green-a5468...,Customer Success Manager at DeepCrawl,
111,Kanani Briggs,https://www.linkedin.com/in/kananibriggs?miniP...,Agent Partner at The Curtin Team - Building we...,
112,Tracy Robinton Huser,https://www.linkedin.com/in/tracy-robinton-hus...,Director at VIVALDI_,


### Scrape informatio from each Linkedin profile

In [None]:
# To be done 