In [None]:
%pip install requests
%pip install beautifulsoup4
%pip install wikipedia-api
%pip install google-api-python-client
%pip install praw
%pip install googlesearch-python
%pip install python-dotenv



In [None]:
import requests
from bs4 import BeautifulSoup
import wikipediaapi
from googleapiclient.discovery import build
import praw
from googlesearch import search
from dotenv import load_dotenv
import os


load_dotenv()

In [None]:
#1. *Amazon:* Web scraping avec BeautifulSoup.  | Produits, prix, avis       |

url = 'https://www.amazon.fr/dp/B0BCFJPLT7'

response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    title = soup.find(id='productTitle').get_text(strip=True)
    price = soup.find('span', {'class': 'a-price-whole'}).get_text(strip=True)
    avis = soup.find(class_="a-icon-alt").get_text()
    
    print(f'Titre du produit: {title}')
    print(f'Prix du produit: {price} ')
    print(f'Avis sur le produit: {avis}')
else:
    print('La requête a échoué avec le statut:', response.status_code)


Titre du produit: Chargeur iphone Rapide, certifié Apple MFi 20W Chargeur Rapide USB c pour iphone avec 2m Câble USB C pour Apple iPhone 14/14Pro/13/13 Mini/13 Pro/13 Pro Max/12/12 Pro/12 Pro Max/11/11 Pro/11 Pro Max
Prix du produit: 12, 
Avis sur le produit: 4,2 sur 5 étoiles


In [None]:
#2. *Twitter:* Utilisation de l'API Twitter v2. | Tweets, likes, retweets 
BEARER_TOKEN=os.getenv('BEARER_TOKEN')

SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"

params = {
    "query": "data science -is:retweet",
    "tweet.fields": "created_at,public_metrics",
    "max_results": 10
}

headers = {
    "Authorization": f"Bearer {BEARER_TOKEN}"
}

response = requests.get(SEARCH_URL, headers=headers, params=params)

if response.status_code == 200:
    data = response.json()
    
    # Affichage des tweets
    for tweet in data.get("data", []):
        print(f"Date : {tweet['created_at']}")
        print(f"Tweet : {tweet['text']}")
        print(f"Likes : {tweet['public_metrics']['like_count']}")
        print(f"Retweets : {tweet['public_metrics']['retweet_count']}")
        print("-" * 50)
else:
    print(f"Erreur {response.status_code}: {response.text}")


In [None]:
#3. *Instagram:* API Instagram Graph. | Captions, likes, images
ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
INSTAGRAM_ACCOUNT_ID = os.getenv('INSTAGRAM_ACCOUNT_ID')
BASE_URL = "https://graph.facebook.com/v22.0"

def get_instagram_posts(limit=5):
    url = f"{BASE_URL}/{INSTAGRAM_ACCOUNT_ID}/media?fields=id,caption,like_count,comments_count,media_url,permalink,timestamp&limit={limit}&access_token={ACCESS_TOKEN}"

    try:
        response = requests.get(url, timeout=10)
        
        if response.status_code == 200:
            data = response.json()
            posts = data.get("data", [])

            for post in posts:
                print(f"Titre: {post.get('caption', 'Aucune légende')}")
                print(f"Likes: {post.get('like_count', 0)}")
                print(f"Commentaires: {post.get('comments_count', 0)}")
                print(f"Lien du média: {post.get('media_url')}")
                print(f"Lien du post: {post.get('permalink')}")
                print(f"Date: {post.get('timestamp')}")
                print('-' * 50)

        else:
            print(f"Erreur: {response.status_code}, {response.text}")

    except requests.exceptions.Timeout:
        print("La requête a pris trop de temps. Vérifie ta connexion Internet ou essaie plus tard.")

    except requests.exceptions.ConnectionError:
        print("Impossible de se connecter. Vérifie ta connexion Internet.")

    except requests.exceptions.RequestException as e:
        print(f"Une erreur est survenue: {e}")

# Exécuter la fonction
get_instagram_posts()

In [None]:
#4. *YouTube:* API YouTube Data. | Titres, vues, commentaires
API_KEY = os.getenv('API_KEY')
VIDEO_ID = os.getenv('VIDEO_ID')

def get_video_details(video_id):
    
    youtube = build('youtube', 'v3', developerKey=API_KEY)

    request = youtube.videos().list(
        part='snippet,statistics',
        id=video_id
    )
    response = request.execute()
    
    if 'items' in response and len(response['items']) > 0:
        video = response['items'][0]
        title = video['snippet']['title']
        views = video['statistics']['viewCount']
        comment_count = video['statistics']['commentCount']

        
        return {
            'title': title,
            'comment_count':  comment_count,
            'views': views
        }
    else:
        return None
    
video_details = get_video_details(VIDEO_ID)
if video_details:
    print(f"Title: {video_details['title']}")
    print(f"comment_count: {video_details['comment_count']}")
    print(f"Views: {video_details['views']}")
else:
    print("Aucune donnée trouvée pour cette vidéo.")


Title: How to Get YouTube API Key 2024 | Create YouTube API Key ( YouTube Data API v3 )
comment_count: 132
Views: 106595


In [None]:
#5. *Google Search:* Scraper avec googlesearch. | Résultats de recherche 
from googlesearch import search

def google_search(query, num_results=10):
    try:
       
        search_results = search(query, num_results=num_results)
        
        for i, result in enumerate(search_results, start=1):
            print(f"Result {i}: {result}")
    
    except Exception as e:
        print(f"Une erreur s'est produite: {e}")


query = "sara adjaho"
google_search(query)


Result 1: https://www.instagram.com/sara_adjaho/
Result 2: https://bj.linkedin.com/in/sara-odile-adjaho-b47399259
Result 3: https://www.facebook.com/sara.adjaho.58/
Result 4: https://www.facebook.com/Togozik/videos/la-talentueuse-sara-adjaho-sara_adjaho-a-immortalis%C3%A9-le-concert-historique-de-sa/1608590310006912/
Result 5: https://www.tiktok.com/@sara.adjaho
Result 6: https://genius.com/artists/Sara-adjaho
Result 7: https://www.linkedin.com/posts/sara-odile-adjaho-b47399259_activity-7240791877841555456-Wo0i
Result 8: https://www.instagram.com/ziktogo/reel/DB43tmTskBK/?locale=ne_NP&hl=af
Result 9: https://www.instagram.com/sedonnogniofficial/?locale=fr_CA&hl=af
Result 10: https://www.tiktok.com/@sara.adjaho/video/7272090568770047264


In [None]:
#6. *Reddit:* API Reddit (PRAW). | Posts, votes, commentaires

reddit = praw.Reddit(
    client_id=os.getenv('CLIENT_ID'),
    client_secret=os.getenv('CLIENT_SECRET'),
    user_agent='Data science'
)


subreddit_name = 'python'
subreddit = reddit.subreddit(subreddit_name)

for submission in subreddit.hot(limit=5):
    print(f'Titre: {submission.title}')
    print(f'Votes: {submission.score}')
    print(f'Lien: {submission.url}')
    print('Commentaires:')

    submission.comment_sort = 'top'
    submission.comments.replace_more(limit=0)
    
    for comment in submission.comments.list()[:5]:
        print(f'  - {comment.body} (Votes: {comment.score})')

    print('-' * 50)

Titre: Sunday Daily Thread: What's everyone working on this week?
Votes: 8
Lien: https://www.reddit.com/r/Python/comments/1jhmdi1/sunday_daily_thread_whats_everyone_working_on/
Commentaires:
  - Currently I’m working on another text based game. I don’t have a name for it but it’s build like an rpg. The UI is in handmade ASCII and is displayed via the console. All you would need is something like VSC to run it. The user has 9 equipment slots. Head, neck 1, neck 2, torso, gloves, pants, boots, main hand, off hand. Each one can have 0-3 affixes applies to it. The affix list for each item varies. Currently I have it sorted into a couple categories. Generic, Armor Generic, and Weapon specific. I’m still working on the gear and affix system. I’m at around 800~ and I’m almost done with the affixes. Still working on all the gear types. (Votes: 2)
  - I'm working on a framework for local/on-prem small-data processing/ETL and warehousing.  

__Background__  
I only deal with a couple thousand ro

In [None]:
#7. *Wikipedia:* API Wikipédia. | Contenu d'articles.       
def extract_wikipedia(page_title,language='en'):
    headers ={
        "User-Agent": "pythonscripte/1.0 (adjahosarahouefa@gmail.com)"
    }
    page_wiki = wikipediaapi.Wikipedia(language, headers=headers).page(page_title)
    
    if not page_wiki.exists():
        return f"Page '{page_title}' not found."
    
    return page_wiki.text
  
page_title = "Bénin"
content = extract_wikipedia(page_title)
print(content)

