Python code to fetch tweets from Twitter using essential access and storing it to AWS S3. The script also includes the code to fetch data from S3 and using it in a python dataframe.


In [None]:
!pip install --upgrade tweepy 

In [None]:
!pip install --upgrade boto3

In [None]:
import tweepy
import csv
import boto3
import pandas as pd
import io


TWEET_FIELDS = [
    'id',
    'text',
    'created_at'
]

# AWS credentials
access_key = "*******************"
secret_key = "*******************"


# Twitter API credentials
BEARER_TOKEN: str = "*******************"

query = "machine+learning" #specify any query string

CLIENT = tweepy.Client(bearer_token=BEARER_TOKEN)

response = CLIENT.search_recent_tweets(query=query,tweet_fields=TWEET_FIELDS,max_results=100)

tweets = []

tweets += [tweet.data for tweet in response[0]]

filename = f'{query}_tweets.csv' 

# Create a CSV file to store the tweets
with open(filename, 'w') as file:
    writer = csv.writer(file, lineterminator = "\n")
    writer.writerow(['Keyword', 'tweet_id', 'created_at','text'])

    # Iterate over the tweets and write them to the CSV file
    for tweet in tweets:
        writer.writerow([query,tweet['id'],tweet['created_at'],tweet['text']])

print("***************** Tweets added in CSV file **************************")


# Authenticate to AWS
s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)

bucket_name = 'testbucketkrutik'

key = filename #path to file


################### Upload the CSV file to S3##########################
s3.upload_file(Filename=filename, Bucket=bucket_name, Key=key)
print("**************** CSV file uploaded to AWS S3 *******************")


print("**************** Reading CSV file from AWS S3 ******************")


#read csv file from S3
response = s3.get_object(Bucket=bucket_name, Key=key)

status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

if status == 200:
    print(f"************** Successful S3 get_object response. Status - {status } ***********************")
    df = pd.read_csv(io.BytesIO(response["Body"].read()), encoding='utf8')
    print(df.head())
else:
    print(f"************ Unsuccessful S3 get_object response. Status - {status} ***********************")


***************** Tweets added in CSV file **************************
**************** CSV file uploaded to AWS S3 *******************
**************** Reading CSV file from AWS S3 ******************
************** Successful S3 get_object response. Status - 200 ***********************
            Keyword             tweet_id                created_at  \
0  machine+learning  1632926881032073217  2023-03-07T02:11:21.000Z   
1  machine+learning  1632926840619663361  2023-03-07T02:11:11.000Z   
2  machine+learning  1632926829525999616  2023-03-07T02:11:09.000Z   
3  machine+learning  1632926745174196224  2023-03-07T02:10:49.000Z   
4  machine+learning  1632926740853973000  2023-03-07T02:10:47.000Z   

                                                text  
0  List of my Favorite Indicators so far \n\n1. L...  
1  New study shows how machine learning can impro...  
2  RT @ONPASSIVE: Synthetic data plays a crucial ...  
3  Introduction to Machine Learning and it’s type...  
4  Machine Learni

In [None]:
df.head()

Unnamed: 0,Keyword,tweet_id,created_at,text
0,machine+learning,1632926881032073217,2023-03-07T02:11:21.000Z,List of my Favorite Indicators so far \n\n1. L...
1,machine+learning,1632926840619663361,2023-03-07T02:11:11.000Z,New study shows how machine learning can impro...
2,machine+learning,1632926829525999616,2023-03-07T02:11:09.000Z,RT @ONPASSIVE: Synthetic data plays a crucial ...
3,machine+learning,1632926745174196224,2023-03-07T02:10:49.000Z,Introduction to Machine Learning and it’s type...
4,machine+learning,1632926740853973000,2023-03-07T02:10:47.000Z,Machine Learning itu Apasih?\nhttps://t.co/ZoU...
