## Real-Time Twitter Data Ingestion Using Delta Live Tables (DLT) Streaming Pipeline

###Flow Diagram Of Project

![](/Volumes/data_gov/data_gov_test/image/tweitter_fow.jpg)

### Integrate with the Twitter API to retrieve data and ingest it into DBFS (Databricks File System)

In [0]:
pip install tweepy

In [0]:
pip install requests


### Extracting the Twwiter Time line data from Own Acccount

In [0]:
import requests
import json
import os
import time

# === CONFIGURATION ===
BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAOnN2QEAAAAAiLGMcTeEc2nrfNrx1Zx17%2BUbUJg%3DBSLkOksts3V4MWPYtQqSs5a600S5tatc3LVxXE6ugcr2giwTHj"
USERNAME = 'madimgiri'      
USER_ID = None  
DBFS_PATH = '/Volumes/data_gov/data_gov_test/twitter/my_tweets.json'

# === Twitter API Headers ===
headers = {
    'Authorization': f'Bearer {BEARER_TOKEN}',
    'User-Agent': 'v2UserTimelinePython'
}

# === Safe GET with Rate Limit Retry ===
def safe_get(url, headers, params=None, max_retries=3, wait_seconds=60):
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 429:
            print(f"⚠️ Rate limit hit. Waiting {wait_seconds}s before retrying...")
            time.sleep(wait_seconds)
        else:
            return response
    raise Exception(f"❌ Failed after {max_retries} retries: {response.status_code} {response.text}")

# === Get user ID (if not provided manually) ===
def get_user_id(username):
    url = f'https://api.twitter.com/2/users/by/username/{username}'
    response = safe_get(url, headers)
    if response.status_code != 200:
        raise Exception(f"Failed to get user ID: {response.status_code} {response.text}")
    return response.json()['data']['id']

# === Get recent tweets (Free tier: only up to 10) ===
def get_recent_tweets(user_id):
    url = f'https://api.twitter.com/2/users/{user_id}/tweets'
    params = {
        'max_results': 10,  # Free plan allows max 10 tweets
        'tweet.fields': 'created_at,id,text'
    }
    response = safe_get(url, headers, params)
    if response.status_code != 200:
        raise Exception(f"Error fetching tweets: {response.status_code} {response.text}")
    return response.json().get('data', [])

# === Save to DBFS (or local path in Databricks) ===
def save_to_dbfs(tweets, file_path=DBFS_PATH):
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(tweets, f, ensure_ascii=False, indent=2)

# === Main Script ===
if __name__ == '__main__':
    try:
        user_id = USER_ID or get_user_id(USERNAME)
        print(f"✅ Twitter User ID: {user_id}")
        tweets = get_recent_tweets(user_id)
        save_to_dbfs(tweets)
        print(f"✅ Saved {len(tweets)} tweet(s) to {DBFS_PATH}")
    except Exception as e:
        print(f"❌ Error: {e}")


In [0]:
import requests

# Replace with your actual Bearer Token and Twitter username (no @)
BEARER_TOKEN ="AAAAAAAAAAAAAAAAAAAAAOnN2QEAAAAAiLGMcTeEc2nrfNrx1Zx17%2BUbUJg%3DBSLkOksts3V4MWPYtQqSs5a600S5tatc3LVxXE6ugcr2giwTHj"
USERNAME = 'madimgiri'

url = f'https://api.twitter.com/2/users/by/username/{USERNAME}'
headers = {
    'Authorization': f'Bearer {BEARER_TOKEN}',
    'User-Agent': 'GetUserID'
}

response = requests.get(url, headers=headers)

if response.status_code == 200:
    user_data = response.json()
    user_id = user_data['data']['id']
    print(f"✅ Your Twitter User ID is: {user_id}")
else:
    print(f"❌ Failed to get user ID: {response.status_code} {response.text}")
