# **Twitter**

## **Save to CSV**

In [None]:
import pandas as pd

def initialize_csv(csv_file='0224.csv'):
    # Define the columns we want in our CSV
    columns = [
        "id",
        "conversation_id",
        "post_type",
        "created_time",
        "timestamp",
        "source",
        "favorite_count",
        "retweet_count",
        "reply_count",
        "quote_count",
        "view_count",
        "text",
        "text_lang",
        "text_tagged_users",
        "author_id",
        "author_username",
        "in_reply_to_post_id",
        "in_reply_to_profile_id",
        "in_reply_to_profile_username",
        "quoted_status_id",
        "quoted_status_profile_id",
        "quoted_status_profile_username"
    ]

    # Create empty DataFrame with these columns
    df = pd.DataFrame(columns=columns)

    # Save to CSV
    df.to_csv(csv_file, index=False)
    print(f"Initialized {csv_file} with column headers")


initialize_csv()

Initialized 0224.csv with column headers


## **Get Data**

In [None]:
import requests
import json
import time
import pandas as pd
from datetime import datetime

def get_all_search_results(keywords, access_token, task_id, max_attempts=12):
    results_url = "https://api.data365.co/v1.1/twitter/search/post/posts"
    all_tweets = []

    # Add location parameters to target US cities
    params = {
        'access_token': access_token,
        'task_id': task_id,
        'keywords': keywords,
        'max_page_size': 500,
        'lang': 'en',
        'to_date': '2023-09-27T13:38:35'
#        'from_date': '2025-02-25T00:00:00'
    }

    for attempt in range(max_attempts):
        try:
            print(f"Attempt {attempt + 1}/{max_attempts} to get results...")
            response = requests.get(results_url, params=params)

            # Debug response
            print(f"Response status code: {response.status_code}")
            print(f"Response content: {response.text}")

            if response.status_code != 200:
                print(f"Error status code: {response.status_code}")
                time.sleep(15)
                continue

            data = response.json()

            if data['status'] == 'ok' and data.get('data', {}).get('items'):
                all_tweets.extend(data['data']['items'])
                print(f"Retrieved {len(data['data']['items'])} tweets.")
                break
            else:
                print("No tweets found in response, waiting 10 seconds...")
                time.sleep(15)
                continue

        except Exception as e:
            print(f"Error: {e}")
            print("Waiting 15 seconds before retry...")
            time.sleep(15)

    return all_tweets

def append_tweets_to_csv(json_data, csv_file='0224.csv'):
    # Convert JSON to list if it's a single dictionary
    if isinstance(json_data, dict):
        tweets = [json_data]
    else:
        tweets = json_data

    # Create list to store tweet data
    tweets_data = []

    for tweet in tweets:
        tweet_dict = {
            "id": tweet["id"],
            "conversation_id": tweet["conversation_id"],
            "post_type": tweet["post_type"],
            "created_time": tweet["created_time"],
            "timestamp": tweet["timestamp"],
            "source": tweet["source"],
            "favorite_count": tweet["favorite_count"],
            "retweet_count": tweet["retweet_count"],
            "reply_count": tweet["reply_count"],
            "quote_count": tweet["quote_count"],
            "view_count": tweet.get("view_count", None),
            "text": tweet["text"],
            "text_lang": tweet["text_lang"],
            "text_tagged_users": tweet.get("text_tagged_users", []),
            "author_id": tweet["author_id"],
            "author_username": tweet["author_username"],
            "in_reply_to_post_id": tweet.get("in_reply_to_post_id"),
            "in_reply_to_profile_id": tweet.get("in_reply_to_profile_id"),
            "in_reply_to_profile_username": tweet.get("in_reply_to_profile_username"),
            "quoted_status_id": tweet.get("quoted_status_id"),
            "quoted_status_profile_id": tweet.get("quoted_status_profile_id"),
            "quoted_status_profile_username": tweet.get("quoted_status_profile_username")
        }
        tweets_data.append(tweet_dict)

    # Convert to DataFrame
    df = pd.DataFrame(tweets_data)

    try:
        # Try to read existing CSV file
        existing_df = pd.read_csv(csv_file)
        # Append new data
        combined_df = pd.concat([existing_df, df], ignore_index=True)
        # Remove duplicates based on tweet ID
        combined_df = combined_df.drop_duplicates(subset=['id'], keep='last')
        # Save back to CSV
        combined_df.to_csv(csv_file, index=False)
        print(f"Successfully appended {len(df)} tweets to {csv_file}")
    except FileNotFoundError:
        # If file doesn't exist, create new CSV
        df.to_csv(csv_file, index=False)
        print(f"Created new file {csv_file} with {len(df)} tweets")

def save_tweets(tweets, filename):
    """Save simplified version of tweets"""
    simplified_tweets = []
    for tweet in tweets:
        simplified_tweets.append({
            'id': tweet['id'],
            'created_time': tweet['created_time'],
            'text': tweet['text'],
            'author_username': tweet['author_username'],
            'favorite_count': tweet['favorite_count'],
            'retweet_count': tweet['retweet_count'],
            'reply_count': tweet['reply_count'],
            'view_count': tweet['view_count'],
            'text_lang': tweet['text_lang']
        })

    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(simplified_tweets, f, indent=2, ensure_ascii=False)

    return simplified_tweets

def main():
    access_token = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKSVV6STFOaUo5LmV5SnpkV0lpT2lKRFRWVWlMQ0pwWVhRaU9qRTNNems0T1RVMk16UXVNalF6T1RNM05YMC5haVR2SDNMYmQxcjgzZ3p0dFhfZWJMSDlyZWJmdVM3NC1fUHd2QWJJamdJ"
    keyword = "rbc wealth management"

    # Iterate through each keyword
    print(f"\n{'='*50}")
    print(f"Processing keyword: {keyword}")
    print(f"{'='*50}\n")

    # STEP 1: Initiate the search
    search_url = "https://api.data365.co/v1.1/twitter/search/post/update"
    search_params = {
        'keywords': keyword,
        'search_type': 'latest',
        'max_posts': 500,
        'access_token': access_token,
        'lang': 'en',
        'to_date': '2023-09-27T13:38:35'
#        'from_date': '2025-02-25T00:00:00'
    }

    print(f"Initiating search for: {keyword}")
    search_response = requests.post(search_url, params=search_params)

    print(f"Search response: {search_response.text}")

    response_data = search_response.json()
    task_id = response_data.get('data', {}).get('task_id')

    if not task_id:
        print(f"Error: Could not find task_id in response for keyword: {keyword}")

    print(f"Search initiated with task_id: {task_id}")

    # STEP 2: Wait before fetching results
    print("Waiting 200 seconds before fetching results...")
    time.sleep(700)

    # STEP 3: Get results
    print(f"Fetching results for task_id: {task_id}")
    tweets = get_all_search_results(keyword, access_token, task_id)

    if not tweets:
        print(f"No tweets were retrieved for keyword: {keyword}")

    # Create timestamp for filenames
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Save full response with keyword in filename
    full_filename = f'full_tweets_{keyword.replace(" ", "_")}_{timestamp}.json'
    with open(full_filename, 'w', encoding='utf-8') as f:
        json.dump(tweets, f, indent=2, ensure_ascii=False)

    # Append to CSV
    with open(full_filename, 'r') as file:
        json_data = json.load(file)
        append_tweets_to_csv(json_data)

    # Save simplified version
    simplified_filename = f'simplified_tweets_{keyword.replace(" ", "_")}_{timestamp}.json'
    simplified = save_tweets(tweets, simplified_filename)

    print(f"\nRetrieved {len(tweets)} tweets for keyword: {keyword}")
    print(f"Saved to '{full_filename}' and '{simplified_filename}'")

if __name__ == "__main__":
    main()



Processing keyword: rbc wealth management

Initiating search for: rbc wealth management
Search response: {"data":null,"error":{"code":"AuthorizationFailed","message":"You have no access to this social network API","target":"access_token","value":null},"status":"fail"}



AttributeError: 'NoneType' object has no attribute 'get'