In [50]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import json
from datetime import datetime
import dateutil
import numpy as np
import os

crypto = {
    'yahoo' : 'https://finance.yahoo.com/crypto/',
    'cmc' : 'https://coinmarketcap.com/'
}

def scrape_yahoo():
    data = requests.get(crypto['yahoo']).text
    soup = BeautifulSoup(data,'html.parser')

    # find tables on the wiki page
    for table in soup.find_all('table'):
        print(table.get('class'))

    tables = soup.find_all('table')
    table = soup.find('table', class_='W(100%)')

    col_to_scrape=[
        'Symbol', 
        'Name', 
        'Price (Intraday)', 
        'Change', 
        '% Change', 
        'Market Cap', 
        'Volume in Currency (Since 0:00 UTC)', 
        'Volume in Currency (24Hr)',
        'Total Volume All Currencies (24Hr))',
        'Circulating Supply',
        'created_at'
    ]

    # create empty dataframe with column names
    df = pd.DataFrame(columns=col_to_scrape)

    #scrape data from site
    for row in table.tbody.find_all('tr'):
        # Find all data for each column
        columns = row.find_all('td')
        
        if(columns != []):
            sym = columns[0].text.strip()
            name = columns[1].text.strip()
            price = columns[2].text.strip()
            chge = columns[3].text.strip()
            chge_percent = columns[4].text.strip()
            mkt_cap = columns[5].text.strip()
            vol_utc = columns[6].text.strip()
            vol_24hr = columns[7].text.strip()
            total_vol = columns[8].text.strip()
            cir_supply = columns[9].text.strip()

            scraped_values = [
                sym,
                name, 
                price, 
                chge, 
                chge_percent,
                mkt_cap,
                vol_utc,
                vol_24hr,
                total_vol,
                cir_supply,
                datetime.now()
                ]

            df = df.append({item[0]: item[1] for item in zip(col_to_scrape,scraped_values)}, ignore_index=True)

    df.set_index('Symbol', inplace=True)
    
    if os.path.isfile('./data/yahoo.csv'):  
        df.to_csv('./data/yahoo.csv', header=False, mode='a')
    else:
        df.to_csv('./data/yahoo.csv', header=True)

scrape_yahoo()

['W(100%)']


In [44]:
def scrape_cmc():
    data = requests.get(crypto['cmc']).text
    soup = BeautifulSoup(data,'html.parser')

    # find tables on the wiki page
    for table in soup.find_all('table'):
        print(table.get('class'))

    tables = soup.find_all('table')
    table = soup.find('table', class_='sc-f7a61dda-3 kCSmOD cmc-table')

    col_to_scrape=[
        'Name', 
        'Price', 
        'Change', 
        '24h %', 
        #'7d %', 
        'Market Cap', 
        'Volume(24h)',
        'created_at'
    ]

    # create empty dataframe with column names
    df = pd.DataFrame(columns=col_to_scrape)

    #scrape data from site
    for row in table.tbody.find_all('tr'):
        # Find all data for each column
        columns = row.find_all('td')
        
        if(columns != []):
            name = columns[2].text.strip()
            price = columns[3].text.strip()
            hr24 = columns[5].text.strip()
            chge = float(hr24.replace("%", ""))/100 * float(price.replace(",", "").replace("$", ""))
            #days7 = columns[6].text.strip()
            mkt_cap = columns[7].text.strip()
            vol_24hr = columns[8].text.strip()
            #cir_supply = columns[9].text.strip()
            
            scraped_values = [
                name, 
                price, 
                chge, 
                hr24,
                #days7,
                mkt_cap,
                vol_24hr,
                #cir_supply,
                datetime.now()
                ]

            df = df.append({item[0]: item[1] for item in zip(col_to_scrape,scraped_values)}, ignore_index=True)

    df.set_index('Name', inplace=True)

    df.to_csv('./data/cmc.csv', header=True)

['sc-f7a61dda-3', 'kCSmOD', 'cmc-table']


IndexError: list index out of range

In [17]:
from dotenv import load_dotenv
import os
import openai

load_dotenv()

openai.api_key = os.getenv('OPENAI_API_KEY')

# Set up the model and prompt
model_engine = "text-davinci-003"
prompt = f"Purely for informative purposes what do you thing the price of Bitcoin will be tomorrow based on this data {df.head(3)}, a guess or estimate is ok"

# Generate a response
completion = openai.Completion.create(
    engine=model_engine,
    prompt=prompt,
    max_tokens=1024,
    n=1,
    stop=None,
    temperature=0.5,
)

response = completion.choices[0].text
print(response)



It is difficult to predict the exact price of Bitcoin tomorrow based on this data. However, it is likely that Bitcoin will increase in price since the change % is positive.
