In [1]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import pandas as pd
import os, sys

import fire

In [2]:
#%%writefile ../pyscrap_url.py

def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content  #.encode(BeautifulSoup.original_encoding)
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)


def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)
    
def get_elements(url, tag='',search={}, fname=None):
    """
    Downloads a page specified by the url parameter
    and returns a list of strings, one per tag element
    """
    
    if isinstance(url,str):
        response = simple_get(url)
    else:
        #if already it is a loaded html page
        response = url

    if response is not None:
        html = BeautifulSoup(response, 'html.parser')
        
        res = []
        if tag:    
            for li in html.select(tag):
                for name in li.text.split('\n'):
                    if len(name) > 0:
                        res.append(name.strip())
                       
                
        if search:
            soup = html            
            
            
            r = ''
            if 'find' in search.keys():
                print('findaing',search['find'])
                soup = soup.find(**search['find'])
                r = soup

                
            if 'find_all' in search.keys():
                print('findaing all of',search['find_all'])
                r = soup.find_all(**search['find_all'])
   
            if r:
                for x in list(r):
                    if len(x) > 0:
                        res.extend(x)
            
        return res

    # Raise an exception if we failed to get any data from the url
    raise Exception('Error retrieving contents at {}'.format(url))    
    
    
if get_ipython().__class__.__name__ == '__main__':
    fire(get_tag_elements)

In [5]:
celeb = get_elements('http://www.travelstart.com.ng/blog/the-50-best-nigerians-to-follow-on-twitter/', tag='a')
celeb

['',
 'About',
 'Book Flights',
 'Book Hotels',
 'Share',
 'Tweet',
 'Subscribe',
 '@DONJAZZY',
 '@Elrufai',
 '@UtomiPat',
 '@JuliusAgwu1',
 '@ALIBABAGCFR',
 '@AYCOMEDIAN',
 '@omojuwa',
 '@Obiasika',
 '@Nedunaija',
 '@OloriSupergal',
 '@Gidi_Traffic',
 '@Fly_Naija',
 '@basket_mouth',
 '@PeterPsquare',
 '@BankyW',
 '@toyosirise',
 '@toluogunlesi',
 '@eggheader',
 '@Realseunkuti',
 '@TiwaSavage',
 '@wandecoal',
 '@lindaikeji',
 '@kunleafolayan',
 '@feladurotoye',
 '@ikhide',
 '@tejucole',
 '@Realomosexy',
 '@darey',
 '@bellanaija',
 '@AbangMercy',
 '@iamdbanj',
 '@Itweetnaija',
 '@9ja_olofofo',
 '@DEMOLAEXPOZE',
 '@GbemiOO',
 '@seunfakze',
 '@blcompere',
 '@funkeakindele',
 '@abinibi',
 '@uchejombo',
 '@Moabudu',
 '@kemiadetiba',
 '@ayorotimi',
 '@MI_Abaga',
 '@ritaUdominic',
 '@obyezeks',
 '@seunosewa',
 '@OsaGz',
 'Oluwakemi Ojo',
 'Share',
 'Tweet',
 'Subscribe',
 'Travelstart stuff',
 'art',
 'follow',
 'Followers',
 'Naija',
 'Nigeria',
 'Twitter',
 'permalink',
 'Reply',
 'Reply',


In [13]:
for i in celeb[55:]:
    celeb.remove(i)  
print(celeb)


['', 'About', 'Book Flights', '@DONJAZZY', '@Elrufai', '@UtomiPat', '@JuliusAgwu1', '@ALIBABAGCFR', '@AYCOMEDIAN', '@omojuwa', '@Obiasika', '@Nedunaija', '@OloriSupergal', '@Gidi_Traffic', '@Fly_Naija', '@basket_mouth', '@PeterPsquare', '@BankyW', '@toyosirise', '@toluogunlesi', '@eggheader', '@Realseunkuti', '@TiwaSavage', '@wandecoal', '@lindaikeji', '@kunleafolayan', '@feladurotoye', '@ikhide', '@tejucole', '@Realomosexy', '@darey', '@bellanaija', '@AbangMercy', '@iamdbanj', '@Itweetnaija', '@9ja_olofofo', '@DEMOLAEXPOZE', '@GbemiOO', '@seunfakze', '@blcompere', '@funkeakindele', '@abinibi', '@uchejombo', '@Moabudu', '@kemiadetiba', '@ayorotimi', '@MI_Abaga', '@ritaUdominic', '@obyezeks', '@seunosewa', '@OsaGz', 'Share', 'Tweet', 'Subscribe', 'Book Hotels']


In [14]:
for i in celeb[0:3]:
    celeb.remove(i)
print(celeb)

['@DONJAZZY', '@Elrufai', '@UtomiPat', '@JuliusAgwu1', '@ALIBABAGCFR', '@AYCOMEDIAN', '@omojuwa', '@Obiasika', '@Nedunaija', '@OloriSupergal', '@Gidi_Traffic', '@Fly_Naija', '@basket_mouth', '@PeterPsquare', '@BankyW', '@toyosirise', '@toluogunlesi', '@eggheader', '@Realseunkuti', '@TiwaSavage', '@wandecoal', '@lindaikeji', '@kunleafolayan', '@feladurotoye', '@ikhide', '@tejucole', '@Realomosexy', '@darey', '@bellanaija', '@AbangMercy', '@iamdbanj', '@Itweetnaija', '@9ja_olofofo', '@DEMOLAEXPOZE', '@GbemiOO', '@seunfakze', '@blcompere', '@funkeakindele', '@abinibi', '@uchejombo', '@Moabudu', '@kemiadetiba', '@ayorotimi', '@MI_Abaga', '@ritaUdominic', '@obyezeks', '@seunosewa', '@OsaGz', 'Share', 'Tweet', 'Subscribe', 'Book Hotels']


In [18]:
for i in celeb[48:]:
    celeb.remove(i)
print(celeb)
cel = celeb

['@DONJAZZY', '@Elrufai', '@UtomiPat', '@JuliusAgwu1', '@ALIBABAGCFR', '@AYCOMEDIAN', '@omojuwa', '@Obiasika', '@Nedunaija', '@OloriSupergal', '@Gidi_Traffic', '@Fly_Naija', '@basket_mouth', '@PeterPsquare', '@BankyW', '@toyosirise', '@toluogunlesi', '@eggheader', '@Realseunkuti', '@TiwaSavage', '@wandecoal', '@lindaikeji', '@kunleafolayan', '@feladurotoye', '@ikhide', '@tejucole', '@Realomosexy', '@darey', '@bellanaija', '@AbangMercy', '@iamdbanj', '@Itweetnaija', '@9ja_olofofo', '@DEMOLAEXPOZE', '@GbemiOO', '@seunfakze', '@blcompere', '@funkeakindele', '@abinibi', '@uchejombo', '@Moabudu', '@kemiadetiba', '@ayorotimi', '@MI_Abaga', '@ritaUdominic', '@obyezeks', '@seunosewa', '@OsaGz']


In [20]:
hha = ['@mbuhari', '@OgbeniDipo', '@segalink', '@Alex_Houseof308', '@atiku', '@davido', '@renoomokri', '@_valkiing']
jour = ['@osasuo', '@Murtalaibin', '@omaakatugba', '@A_Salkida', '@fisayosoyombo', '@KemiOlunloyo', '@WSoyinkaCentre', '@SaharaReporters']
pol = ['@NGRSenate','@nassnigeria','@SPNigeria','@HouseNGR','@DrAhmadLawan','@ShehuSani','@hadisirika','@dino_melaye','@DrSRJ']

In [22]:
t_list = cel+hha+jour+pol
print(t_list)

['@DONJAZZY', '@Elrufai', '@UtomiPat', '@JuliusAgwu1', '@ALIBABAGCFR', '@AYCOMEDIAN', '@omojuwa', '@Obiasika', '@Nedunaija', '@OloriSupergal', '@Gidi_Traffic', '@Fly_Naija', '@basket_mouth', '@PeterPsquare', '@BankyW', '@toyosirise', '@toluogunlesi', '@eggheader', '@Realseunkuti', '@TiwaSavage', '@wandecoal', '@lindaikeji', '@kunleafolayan', '@feladurotoye', '@ikhide', '@tejucole', '@Realomosexy', '@darey', '@bellanaija', '@AbangMercy', '@iamdbanj', '@Itweetnaija', '@9ja_olofofo', '@DEMOLAEXPOZE', '@GbemiOO', '@seunfakze', '@blcompere', '@funkeakindele', '@abinibi', '@uchejombo', '@Moabudu', '@kemiadetiba', '@ayorotimi', '@MI_Abaga', '@ritaUdominic', '@obyezeks', '@seunosewa', '@OsaGz', '@mbuhari', '@OgbeniDipo', '@segalink', '@Alex_Houseof308', '@atiku', '@davido', '@renoomokri', '@_valkiing', '@osasuo', '@Murtalaibin', '@omaakatugba', '@A_Salkida', '@fisayosoyombo', '@KemiOlunloyo', '@WSoyinkaCentre', '@SaharaReporters', '@NGRSenate', '@nassnigeria', '@SPNigeria', '@HouseNGR', '@DrAh

In [23]:
#!pip install tweepy
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

In [24]:
consumer_key = "K8W2HpecQElw6KgwuVHeo2yam"
consumer_secret = "nefpW8qOYBcWxvW31FbnctvfhT7JdxbAomapRsfflDuTMyjbR6"
access_token = "994261917253472256-9Pr2nCD3YbPSS7spfTIM2azickdWaOd"
access_token_secret = "g4k7BxSZKaPncIExbAAz6YUOrjl8Cq5NNcpLq8axpbsUp"

In [25]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

In [26]:
# getting only 30 followers
all_followers = []
for i in t_list:
    try:
        for follower in tweepy.Cursor(api.followers, screen_name=i).items(10): 
            all_followers.append(follower.screen_name)
    except tweepy.TweepError as e:
        continue
#print(all_followers)



         screen_name
0     Akachi78159533
1     Akinlade_Afees
2    UsamaAbubakarB3
3           aniboy_e
4     agham_jennifer
5            QuahmeV
6            XtraAmo
7            airauhi
8    Kingsleyarrang1
9       De_dreamer85
10       Bellomanzo1
11         Marsh2023
12   FoundationBegi1
13         AhmaduJam
14       SanjoAbioye
15   IbrahimAbbaIsh4
16   GamboAyubaPaul2
17      JafarSalisu9
18   Abubaka58629031
19            etsuyi
20   FadojuOlugbeng1
21         Marsh2023
22          Tolu_GGA
23           sonypez
24       TaiwoMajeks
25        TomWrights
26   ArticulateMedi1
27    saxmatikzmusic
28          IndiaUmu
29   chinonsoAntho18
..               ...
640  GlamourProgress
641    Larry05533452
642   Teepha02818064
643         Mogaji_1
644  oladunni_lasisi
645  SHARAFA96420181
646  SensationalShu3
647           wojuRA
648  Ebiniyioluwase2
649    David19024527
650    Planepilot285
651  benchmark_waliu
652       NuraBaffa9
653    MaradunMakama
654         ugodean1
655  GONIALIM

In [28]:
followers=pd.DataFrame({'screen_name':all_followers})
followers.to_csv(r"C:\Users\HP\Desktop\CV, P.Statement and others\10 Academy\week2_followers.csv",index = False, header=True)
print(followers)

         screen_name
0     Akachi78159533
1     Akinlade_Afees
2    UsamaAbubakarB3
3           aniboy_e
4     agham_jennifer
5            QuahmeV
6            XtraAmo
7            airauhi
8    Kingsleyarrang1
9       De_dreamer85
10       Bellomanzo1
11         Marsh2023
12   FoundationBegi1
13         AhmaduJam
14       SanjoAbioye
15   IbrahimAbbaIsh4
16   GamboAyubaPaul2
17      JafarSalisu9
18   Abubaka58629031
19            etsuyi
20   FadojuOlugbeng1
21         Marsh2023
22          Tolu_GGA
23           sonypez
24       TaiwoMajeks
25        TomWrights
26   ArticulateMedi1
27    saxmatikzmusic
28          IndiaUmu
29   chinonsoAntho18
..               ...
640  GlamourProgress
641    Larry05533452
642   Teepha02818064
643         Mogaji_1
644  oladunni_lasisi
645  SHARAFA96420181
646  SensationalShu3
647           wojuRA
648  Ebiniyioluwase2
649    David19024527
650    Planepilot285
651  benchmark_waliu
652       NuraBaffa9
653    MaradunMakama
654         ugodean1
655  GONIALIM

In [None]:
screen_name = ['Paul35304891']
tweets=[]
for i in screen_name:
    result = api.user_timeline(i, count=5)
    for tweet in result:
        tweets.append(tweet.text)
print(tweet)
    

In [29]:
#getting tweets
tweets = []
tweetCount=5
for i in all_followers:
    try:
        results=api.user_timeline(id=i, count=tweetCount)
    except tweepy.TweepError as e:
                continue  
    for tweet in results:
        tweets.append(tweet.text)
print(tweets)


