# Requesting API Keys and Reading Documentation to see Endpoints

### Example 1: Alphavantage.co stock API

They group their data into five categories: <br />
(1) Core Time Series Stock Data APIs,<br /> (2) Fundamental Data,<br /> (3) Physical and Digital/Crypto Currencies (e.g., Bitcoin),<br /> (4) Economic Indicators, and<br />  (5) Technical Indicators.

#### Endpoint: TIME_SERIES_INTRADAY

returns the most recent 1-2 months of intraday data for short-term medium-term charting and trading strategy develipment. API Parameters: <br />
❚ **Required: function**

The time series of your choice. In this case, *function=TIME_SERIES_INTRADAY*

❚ **Required: symbol**

The name of the equity of your choice. For example: *symbol=IBM*

❚ **Required: interval**

Time interval between two consecutive data points in the time series. The following values are supported: *1min, 5min, 15min, 30min, 60min*



In [1]:
## TIME_SERIES_INTRADAY Endpoint: 
import requests
# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
symbol = 'TSLA'
url = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol='+symbol+'&interval=1min&apikey=N36TQRUZSD9RMV4D'
r = requests.get(url)
data = r.json()

In [3]:
import pandas as pd
df = pd.DataFrame()

key_list = list(data['Time Series (1min)'].keys())
val_list = list(data['Time Series (1min)'].values())

ticker = []

for index, key in enumerate(key_list):
    df.loc[index,'ticker'] = data['Meta Data']['2. Symbol']
    df.loc[index,'interval'] = data['Meta Data']['4. Interval']
    df.loc[index,'timestamp'] = key
    df.loc[index,'open'] = val_list[index]['1. open']
    df.loc[index,'high'] = val_list[index]['2. high']
    df.loc[index,'low'] = val_list[index]['3. low']
    df.loc[index,'close'] = val_list[index]['4. close']
    df.loc[index,'volume'] = val_list[index]['5. volume']
    

# Amazon S3 (Simple Storage Service) Storage

In [92]:
#run first time (in case you do not have boto3 installed)
!pip install boto3 --quiet

First you need to create an Amazon AWS account. Once you created your account, seach for the S3 storage service. In the upper right corner, under your profile name, click ***Security Credentials*** and then go to the drop down that says ***Access Keys***, click on ***Create New Key***, and copy your AWS_KEY and AWS_SECRET, you will need them to run the cells below.

In [4]:
import boto3
import json
from io import StringIO

AWS_KEY="AKIA4KVJGAVTYSWJW7V3"
AWS_SECRET="Pv5XJCCtOeTfigXUVyHpDSdWq/JuGgEXYmngS3R7"
REGION="us-east-1"

### Creating an S3 bucket from script

In [95]:
s3 = boto3.client('s3', aws_access_key_id=AWS_KEY,
                            aws_secret_access_key=AWS_SECRET)

s3.create_bucket(Bucket='bigdatawm327')

{'ResponseMetadata': {'RequestId': 'KM486KVT8GN046K7',
  'HostId': 'Vcbra/gxATtoIv9ELp+GqX8YyTiOWkS6j0VLh8/YGqZ+78snJWcFTF+hs5037e2eVWW5ccsdZPM=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'Vcbra/gxATtoIv9ELp+GqX8YyTiOWkS6j0VLh8/YGqZ+78snJWcFTF+hs5037e2eVWW5ccsdZPM=',
   'x-amz-request-id': 'KM486KVT8GN046K7',
   'date': 'Sun, 27 Mar 2022 22:06:22 GMT',
   'location': '/bigdatawm327',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': '/bigdatawm327'}

### List existing S3 Buckets

In [6]:
s3 = boto3.client('s3', aws_access_key_id=AWS_KEY,
                            aws_secret_access_key=AWS_SECRET)
                           
response = s3.list_buckets()

buckets = [bucket['Name'] for bucket in response['Buckets']]

### Saving into an Existing S3 bucket

In [20]:
#we will use the symbol name and today's date as part of the filename
from datetime import date
today = date.today()
print("Today's date:", today, 'filename: ', filename)

filename = symbol+'_'+str(today)+'.csv'
df.to_csv(filename, index = False, encoding='utf-8-sig')

csv_buffer = StringIO()
df.to_csv(csv_buffer, index = False, encoding='utf-8-sig')

Today's date: 2022-03-27 filename:  TSLA_2022-03-27.csv


In [19]:
from io import StringIO

bucket = 'bigdatawm327'

# Creating the high level object oriented interface
s3 = boto3.resource('s3',
    aws_access_key_id = AWS_KEY,
    aws_secret_access_key = AWS_SECRET
)

# overwrite
s3.Object(bucket, filename).put(Body=csv_buffer.getvalue())

{'ResponseMetadata': {'RequestId': 'N5VTH3HFF7QVS6CB',
  'HostId': 'gA+RYzi83B86HWORFqFD/+uoycWzJANwvCJaOkWLBTbAgkCA4bJUagDoifnWZLAn40/5MRwr33c=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'gA+RYzi83B86HWORFqFD/+uoycWzJANwvCJaOkWLBTbAgkCA4bJUagDoifnWZLAn40/5MRwr33c=',
   'x-amz-request-id': 'N5VTH3HFF7QVS6CB',
   'date': 'Sun, 27 Mar 2022 22:37:50 GMT',
   'etag': '"fb2b9f58e44bbafa35dba2e8e378beb7"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"fb2b9f58e44bbafa35dba2e8e378beb7"'}

In [None]:
### Check that the file was created in your S3 via the browser or just look at your 

In [25]:
s3 = boto3.client('s3', aws_access_key_id=AWS_KEY,
                            aws_secret_access_key=AWS_SECRET)
                           

response = s3.get_object(Bucket=bucket,
                         Key='TSLA_2022-03-27.csv')


s3csv = pd.read_csv(response['Body'])

In [26]:
s3csv

Unnamed: 0,ticker,interval,timestamp,open,high,low,close,volume
0,TSLA,1min,2022-03-25 20:00:00,1011.70,1011.75,1011.1500,1011.15,777
1,TSLA,1min,2022-03-25 19:58:00,1011.50,1011.50,1011.5000,1011.50,690
2,TSLA,1min,2022-03-25 19:56:00,1011.18,1011.18,1011.1800,1011.18,446
3,TSLA,1min,2022-03-25 19:55:00,1011.00,1011.00,1011.0000,1011.00,1246
4,TSLA,1min,2022-03-25 19:54:00,1010.90,1011.00,1010.9000,1011.00,1244
...,...,...,...,...,...,...,...,...
95,TSLA,1min,2022-03-25 17:00:00,1009.75,1009.75,1009.7499,1009.75,2447
96,TSLA,1min,2022-03-25 16:59:00,1009.80,1009.80,1009.7000,1009.70,1759
97,TSLA,1min,2022-03-25 16:58:00,1009.77,1009.80,1009.7200,1009.72,974
98,TSLA,1min,2022-03-25 16:57:00,1010.00,1010.00,1009.7500,1009.75,4539


# Example 2: Twitter API

In [None]:
import boto3
import json
from io import StringIO

AWS_KEY="AKIA4KVJGAVTYSWJW7V3"
AWS_SECRET="Pv5XJCCtOeTfigXUVyHpDSdWq/JuGgEXYmngS3R7"
REGION="us-east-1"

In [43]:
#we get this directly from Postman
import pandas as pd
import requests

tags = ['realmadrid', 'osasuna', 'valencia']
tag = 'realmadrid'

payload={}
headers = {
  'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAHZvWgEAAAAAc4YptgzgGSFs4Q4W%2BX0nZHhMCWo%3DmIQZ1WO7CXFeVrjm8C1jqg3AlqnTzYDISkx93a9hszVuGbjt4D',
  'Cookie': 'guest_id=v1%3A164559248260700812; guest_id_ads=v1%3A164559248260700812; guest_id_marketing=v1%3A164559248260700812; personalization_id="v1_2E6Q2uENbjcLzQ/+AZbsfg=="'
}

url = "https://api.twitter.com/2/tweets/counts/recent?query="+tag

response = requests.request("GET", url, headers=headers, data=payload)

response_json = response.json()

In [49]:
response_json['data'][0:2]

[{'end': '2022-03-21T00:00:00.000Z',
  'start': '2022-03-20T23:21:42.000Z',
  'tweet_count': 3682},
 {'end': '2022-03-21T01:00:00.000Z',
  'start': '2022-03-21T00:00:00.000Z',
  'tweet_count': 3863}]

In [51]:
data = pd.json_normalize(response_json['data'])

In [52]:
data.head()

Unnamed: 0,end,start,tweet_count
0,2022-03-21T00:00:00.000Z,2022-03-20T23:21:42.000Z,3682
1,2022-03-21T01:00:00.000Z,2022-03-21T00:00:00.000Z,3863
2,2022-03-21T02:00:00.000Z,2022-03-21T01:00:00.000Z,2177
3,2022-03-21T03:00:00.000Z,2022-03-21T02:00:00.000Z,1718
4,2022-03-21T04:00:00.000Z,2022-03-21T03:00:00.000Z,1302


#### Now we are going to loop through each of the tags and add the features in the dataframe

In [113]:
df_result = pd.DataFrame()

for index, tag in enumerate(tags):
    url = "https://api.twitter.com/2/tweets/counts/recent?query="+tag

    response = requests.request("GET", url, headers=headers, data=payload)

    response_json = response.json()
    
    df = pd.json_normalize(response_json['data'])
    df['date'] = df['end'].str[:10]
    df['query'] = tag

    df_result = df_result.append(df)
    

  df_result = df_result.append(df)
  df_result = df_result.append(df)
  df_result = df_result.append(df)


In [114]:
df_result.head()

Unnamed: 0,end,start,tweet_count,date,query
0,2022-03-21T01:00:00.000Z,2022-03-21T00:30:11.000Z,1709,2022-03-21,realmadrid
1,2022-03-21T02:00:00.000Z,2022-03-21T01:00:00.000Z,2177,2022-03-21,realmadrid
2,2022-03-21T03:00:00.000Z,2022-03-21T02:00:00.000Z,1718,2022-03-21,realmadrid
3,2022-03-21T04:00:00.000Z,2022-03-21T03:00:00.000Z,1302,2022-03-21,realmadrid
4,2022-03-21T05:00:00.000Z,2022-03-21T04:00:00.000Z,993,2022-03-21,realmadrid


In [115]:
del df_result['end']
del df_result['start']

In [116]:
df_result.head()

Unnamed: 0,tweet_count,date,query
0,1709,2022-03-21,realmadrid
1,2177,2022-03-21,realmadrid
2,1718,2022-03-21,realmadrid
3,1302,2022-03-21,realmadrid
4,993,2022-03-21,realmadrid


In [119]:
df_result['query'].value_counts()

realmadrid    169
osasuna       169
valencia      169
Name: query, dtype: int64

In [111]:
print(df_result[df_result['query']=='realmadrid']['tweet_count'].median())
print(df_result[df_result['query']=='valencia']['tweet_count'].median())
print(df_result[df_result['query']=='osasuna']['tweet_count'].median())

653.0
1254.0
45.0


# Exercise: Save the Twitter data into S3

In [None]:
#1. Create a bucket called soccer_studentid

In [None]:
#2. Construct a file name that includes the date and a csv extension

In [None]:
#3. Save into your S3 bucket

In [None]:
#4. Check that your file was saved in your bucket

#### Explore other APIs (the number of requests in the free versions is limited)

Go to: https://rapidapi.com/hub

# Any questions?